//  Date:         08/07/2018
//  task:         education analysis (Anderson)
//  project:      World Development

capture log close
clear all
macro drop _all
set more off
set matsize 800

//  #0:  setup

log using Analysis_school_Anderson,replace text

//  #1: load data
use ubridge_schools_long.dta

//  #2: define covariates

	*local controls c.c_lpop##treat c.c_age##treat c.c_poverty_census##treat c.c_lugbara_share##treat c.c_polarization##treat ///
	*c.c_HHI_Religion##treat c.c_literacy##treat c.c_education##treat c.c_employed_share##treat c.c_Nonagriculture_share##treat c.c_Arua_dist##treat
	local controls c.c_age##treat c.c_lugbara_share##treat c.c_Arua_dist##treat
	su `controls'
	
	local indicators lugbara_share_miss Arua_dist_miss
	tab1 `indicators'

	foreach y in monitoring_index1 effort_index1 input_index1 outcome_index1{
	center `y', replace
	drop `y'
	ren c_`y' `y'
	}
	
//  #3: define outcomes
	
local edu_monitoring index_edu_monitoring c_V_deo_ever_call c_V_insp_calls c_V_deo_visit_rec c_V_isp_visit_rec   
local edu_effort index_edu_effort  c_Teacher_Absent c_V_present_teach_e c_V_perc_alotwritten c_V_perc_Engaged c_V_school_staff_meet
local edu_input index_edu_input  c_V_n_teachers c_V_teach_transf_to c_students_supply1 c_students_supply2 c_students_supply3
local edu_outcome index_edu_outcome c_A_enrollment c_A_PLE_Grade1rate c_A_PLE_Grade2rate c_A_PLE_passrate

********************************************************************************
*REGRESSIONS
********************************************************************************

foreach y in `edu_monitoring' `edu_effort' `edu_input' `edu_outcome' { 
	capture noisily reg `y'2 treat c.`y'1##treat i.schooltype
	capture est sto `y'_m1

	capture noisily reg `y'2 treat c.`y'1##treat `controls' `indicators' i.schooltype
	capture  est sto `y'_m2

	capture noisily reg `y'3 treat c.`y'1##treat i.schooltype
	capture est sto `y'_e1

	capture noisily reg `y'3 treat c.`y'1##treat `controls' `indicators' i.schooltype
	capture est sto `y'_e2
	
***********************	
* regressions without baseline measure of the DV
***********************
	capture noisily reg `y'2 treat i.schooltype
	capture est sto `y'_m3
	
	capture noisily reg `y'2 treat `controls' `indicators' i.schooltype
	capture  est sto `y'_m4

	capture noisily reg `y'3 treat i.schooltype
	capture est sto `y'_e3
	
	capture noisily reg `y'3 treat `controls' `indicators' i.schooltype
	capture  estadd local control "yes"
	capture est sto `y'_e4
	}	


*************************
* Short-term tbl
*************************

forval c = 1/4  { //*************************************************************
	local one index_edu_monitoring_m`c' c_V_deo_ever_call_m`c' c_V_insp_calls_m`c' c_V_deo_visit_rec_m`c' c_V_isp_visit_rec_m`c' 
	local two index_edu_effort_m`c' c_Teacher_Absent_m`c' c_V_present_teach_e_m`c' c_V_perc_alotwritten_m`c' c_V_perc_Engaged_m`c'	
	local three index_edu_input_m`c' c_V_n_teachers_m`c' c_V_teach_transf_to_m`c' c_students_supply1_m`c' c_students_supply2_m`c' c_students_supply3_m`c' 
	local four index_edu_outcome_m`c' c_A_enrollment_m`c' c_A_PLE_Grade1rate_m`c' c_A_PLE_Grade2rate_m`c' c_A_PLE_passrate_m`c'
	
	tempfile short
	local n = 0
	foreach i in one two three four {

		suest ``i'' , vce(cluster cluster_id)
	
		preserve

			regsave , pval ci table(e1, parentheses(stderr) format(%8.3fc)) 
			gen grp = `n'
			keep if regexm(var , ":treat_coef") ///
				| regexm(var , ":treat_stderr") ///
				| regexm(var , ":treat_pval") ///
				| regexm(var , ":treat_ci_upper") ///
				| regexm(var , ":treat_ci_lower")
			
			replace var = subinstr(var , "treat_" , "" , .)
			gen stat = substr(var , strpos(var , ":") + 1 , .)
			replace var = substr(var , 1 , strpos(var , "_m`c'_mean") - 1)
			replace var = substr(var , 3 , .) if regexm(var , "^c_")
			
			gen order = _n
			reshape wide e1 order , i(var) j(stat) string
			renvars e1* , prefix(S) // prefix is not installed in Stata by default
			
			if `n' > 0 {
				append using `short'
				}
			
			save `short' , replace
		
		restore
		local ++n
		}

******************	
* long-term tbl
****************** 

	local one index_edu_monitoring_e`c' c_V_deo_ever_call_e`c' c_V_insp_calls_e`c' c_V_deo_visit_rec_e`c' c_V_isp_visit_rec_e`c' 
	local two index_edu_effort_e`c' c_Teacher_Absent_e`c' c_V_present_teach_e_e`c' c_V_perc_alotwritten_e`c' c_V_perc_Engaged_e`c' c_V_school_staff_meet_e`c'	
	local three index_edu_input_e`c' c_V_n_teachers_e`c' c_V_teach_transf_to_e`c' c_students_supply1_e`c' c_students_supply2_e`c' c_students_supply3_e`c' 
	*local four outcome_index_e`c' c_A_enrollment_e`c' c_A_PLE_Grade1rate_e`c' c_A_PLE_Grade2rate_e`c' c_A_PLE_passrate_e`c'

		tempfile long
		local n = 0
		foreach i in one two three {
		
			suest ``i'' , vce(cluster cluster_id)
			
			preserve
		
				regsave , pval ci table(e1, parentheses(stderr) format(%8.3fc)) 
		
				gen grp = `n'
				keep if regexm(var , ":treat_coef") ///
					| regexm(var , ":treat_stderr") ///
					| regexm(var , ":treat_pval") ///
					| regexm(var , ":treat_ci_upper") ///
					| regexm(var , ":treat_ci_lower")
					
				replace var = subinstr(var , "treat_" , "" , .)
				gen stat = substr(var , strpos(var , ":") + 1 , .)
				replace var = substr(var , 1 , strpos(var , "_e`c'_mean") - 1)
				replace var = substr(var , 3 , .) if regexm(var , "^c_")
	
				gen order = _n
				reshape wide e1 order , i(var) j(stat) string
				renvars e1* , prefix(L)

				if `n' > 0 {
					append using `long'
					}
			
				save `long' , replace
			
			restore
			local ++n
			}

	preserve
		use `short' , replace
		merge 1:1 var using `long'
		
		foreach k in S L {
			gen `k'_ci = "[" + `k'e1ci_lower + " , " + `k'e1ci_upper + "]" 
			replace `k'_ci = "" if missing(`k'e1ci_lower)

			destring `k'e1pval , replace
			replace `k'e1pval = `k'e1pval/2 ///
				if real(`k'e1coef) >= 0 & !missing(`k'e1pval)
			replace `k'e1pval = 1-(`k'e1pval/2) ///
				if real(`k'e1coef) < 0
			tostring `k'e1pval , replace format(%12.3f) force
			replace `k'e1pval = "" if `k'e1pval == "."

			}
		drop *lower *upper
		
		tempfile combine`c'
		save `combine`c'' , replace
	restore
	
	preserve	
		use `combine`c'' , clear
		compress

		gsort grp ordercoef -_merge
		include output3_replace_edit.do
		drop order* grp _merge
		order var Se1coef Se1stderr S_ci Se1pval  Le1coef Le1stderr L_ci Le1pval 
		saveold edu_Anderson`c', replace
	restore
	}
	
// create tables:
preserve
use edu_Anderson1, clear

*********************************
* Table 12 in Appendix
********************************* 

list, sep(5)

restore

preserve
use edu_Anderson2, clear

*********************************
* Table 13 in Appendix
********************************* 

list, sep(5)

restore

*************************************************************
* Randomization Inference
*************************************************************

clear
set more off
use ubridge_schools_long.dta

gen index_edu_outcome3=1
	foreach y in monitoring_index1 effort_index1 input_index1 outcome_index1{
	center `y', replace
	drop `y'
	ren c_`y' `y'
	}

forvalues i=2(1)3{
foreach y in index_edu_monitoring index_edu_effort index_edu_input index_edu_outcome {
	regress `y'`i' treat c.`y'1##treat i.schooltype
	mat beta=e(b)
	svmat double beta
	ren beta1 b`y'`i'
	drop beta2-beta11
	su b`y'`i'
	local b`y'`i' = r(mean)
	ritest treat _b[ treat ], cluster(cluster_id) reps(5000) right seed(05042018) saving("`y'`i'A", replace): regress `y'2 treat c.`y'1##treat i.schooltype, cluster(cluster_id) // ritest is not installed in Stata by default
	
	preserve
			use "`y'`i'A", clear
			gen t= `b`y'`i''
			gen c=0
			replace c=1 if _pm_1 >= t
			egen p = mean(c)
			saveold "`y'`i'A", replace
	restore
	}	
	}
	
****************************************		
graph drop _all

*************************************************************
* Figure 2 in Appendix
*************************************************************

* Short-term
preserve 
foreach i in monitoring effort input outcome {
	use index_edu_`i'2A.dta, clear
	quietly su t, de
	local t=round(r(mean),.001)
	quietly su p, de
	local p=round(r(mean),.001)

		hist _pm_1 , kdensity lcolor() title(Education `i' (year-1)) addplot(pci 0 `t' 2.5 `t', lc(red) lp(solid) lw(thick)) text(0.5 .4 "pval=`p'", place(ne) size(small)) text(2 `t' "{&tau}=`t'", place(n2) size(small)) name(`i'2) xlabel(-.6(.1).6) xsca(r(-.6 .6)) legend(off)
	}
restore 	

* Long-term
preserve 
foreach i in monitoring effort input {
	use index_edu_`i'3A.dta, clear
	quietly su t, de
	local t=round(r(mean),.001)
	quietly su p, de
	local p=round(r(mean),.001)

		hist _pm_1 , kdensity lcolor() title(Education `i' (year-2)) addplot(pci 0 `t' 2 `t', lc(red) lp(solid) lw(thick)) text(1 .3 "pval=`p'", place(ne) size(small)) text(1.75 `t' "{&tau}=`t'",place(ne) size(small)) name(`i'3) xlabel(-.6(.1).6) xsca(r(-.6 .6)) legend(off)
	}
restore 

	graph combine monitoring2 monitoring3 effort2 effort3 input2 input3, title("Education (weighted indices)") col(2)
		
log close
clear
exit